from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter, RecursiveCharacterTextSplitter

loader = TextLoader("../other/xiaomi.txt", encoding="utf-8")

document = loader.load()

# text_split = CharacterTextSplitter(
#     chunk_size=100,
#     chunk_overlap=20,
#     separator="\n\n"
# )

text_split = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
    separators=["\n\n", "\n", "。", "，", " ", ""]
)

documents = text_split.split_documents(document)

for content in documents:
    print(content)
